import requests
import re
'''
功能：根据关键词爬取淘宝商品信息

'''
headers = {
    'authority': 's.taobao.com',
    'method': 'GET',
    'path': '/search?q=%E8%A1%A3%E6%9C%8D&imgfile=&js=1&stats_click=search_radio_all%3A1&initiative_id=staobaoz_20200827&ie=utf8',
    'scheme': 'https',
    'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
    'accept-encoding': 'gzip, deflate, br',
    'accept-language': 'zh-CN,zh;q=0.9,en;q=0.8',
    'cache-control': 'max-age=0',
    'cookie': '_samesite_flag_=true; cookie2=168cd428269b89a4815ccc11993d5e2e; t=287f91bc45396c0354c0329fc2ad68a0; _tb_token_=f479e0b83b079; cna=o5nLF/KzH3ICAXTsspNiQKRI; sgcookie=E05A1cf6rlEiOpd0ZbsJu; unb=2711135296; uc3=vt3=F8dCufXABjMTBs2Penw%3D&id2=UU8Jpoz505t1bg%3D%3D&lg2=VFC%2FuZ9ayeYq2g%3D%3D&nk2=2%2BCloNpMWktQlwY3; csg=c4bb5e2a; lgc=%5Cu8BE5id%5Cu5DF2%5Cu7ECF%5Cu4F5C%5Cu5E9F; cookie17=UU8Jpoz505t1bg%3D%3D; dnk=%5Cu8BE5id%5Cu5DF2%5Cu7ECF%5Cu4F5C%5Cu5E9F; skt=302e4a94117b77b4; existShop=MTU5ODUwNzUzOQ%3D%3D; uc4=id4=0%40U22OR2QD8qxhxWB3OOe7K4bDABRS&nk4=0%402Y79qgRImb9XJU1JH0xrtfJ3XT5OxYE%3D; tracknick=%5Cu8BE5id%5Cu5DF2%5Cu7ECF%5Cu4F5C%5Cu5E9F; _cc_=WqG3DMC9EA%3D%3D; _l_g_=Ug%3D%3D; sg=%E5%BA%9F63; _nk_=%5Cu8BE5id%5Cu5DF2%5Cu7ECF%5Cu4F5C%5Cu5E9F; cookie1=U7PDWJ1D7PVW%2F%2FtkBRFDNUrb%2FzUi1IhInjabgeHdx%2Fg%3D; enc=F0uCQQ4Xe7yWCzGLpEFTT8OoC%2BCuZUEc3mqCDUAcGZ80rtvIurKpQlpVMv0hT2jZI8ZM%2BKxgVb3JmRh5eZVUEA%3D%3D; xlly_s=1; hng=CN%7Czh-CN%7CCNY%7C156; mt=ci=14_1; thw=cn; uc1=cookie16=W5iHLLyFPlMGbLDwA%2BdvAGZqLg%3D%3D&cookie21=Vq8l%2BKCLjhS4UhJVbhgU&cookie15=U%2BGCWk%2F75gdr5Q%3D%3D&existShop=false&cookie14=UoTV5O9DnRzX0A%3D%3D&pas=0; JSESSIONID=5DC689C413690168A31FE96B027646ED; l=eBEbhiDIOUt_UPpCBOfZourza779SIRAguPzaNbMiOCP9Ufp50NcWZPoObL9CnGVh6WMR3RZ-DGWBeYBYIv4n5U62j-laskmn; tfstk=c-TOBR0NYufspy5pahnhGHd1UHchZ_kAd560kbp2o64Yw94Ai2YkyRUEfswOJXC..; isg=BJSUQa6ItwDgiCPMl5VsVu7aZdsG7bjXTeRG6S51JJ-iGTRjVvhMZx2bHRGB4fAv',
    'eferer': 'https://s.taobao.com/search?q=%E4%B9%A6%E5%8C%85',
    'sec-fetch-dest': 'document',
    'sec-fetch-mode': 'navigate',
    'sec-fetch-site': 'same-origin',
    'sec-fetch-user': '?1',
    'upgrade-insecure-requests': '1',
    'user-agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.4103.116 Safari/537.36'
}



def getHTMLText(url):
    try:
        r = requests.get(url,timeout=30,headers=headers)
        r.raise_for_status()
        r.encoding=r.apparent_encoding
        return r.text
    except:
        return "wrong1"

def parsePage(ilt,html):
    try:
        plt = re.findall(r'"view_price":"[(\d.)]*"',html)
        tlt = re.findall(r'"raw_title":".*?"',html)
        for i in range(len(plt)):
            price = eval(plt[i].split(':')[1])
            title = eval(tlt[i].split(':')[1])
            ilt.append([price,title])
    except:
        print("")

def printGoodsList(ilt):
    tplt="{:4}\t{:8}\t{:16}"
    # print(tplt.format("序号","价格","商品名称"))
    count = 0
    for g in ilt:
        count += 1
        print(count,g[0],g[1])


def main():
    url_str = 'https://s.taobao.com/search?q='
    key_word = '衣服'
    url = url_str + key_word
    depth = 11
    infoList = []
    for i in range(depth):
        try:
            start_url = url+"&s=" + str(44*i)
            html = getHTMLText(url)
            parsePage(infoList,html)
            print(printGoodsList(infoList))
        except:
            continue

main()